{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "7563a171",
   "metadata": {},
   "source": []
  },
  {
   "cell_type": "markdown",
   "id": "d006b2ea-9dfe-49c7-88a9-a5a0775185fd",
   "metadata": {},
   "source": [
    "# Exercise - week 2: German translator\n",
    "\n",
    "This should include a Gradio UI, streaming, use of the system prompt to add expertise, and the ability to switch between models. Bonus points if you can demonstrate use of a tool!\n",
    "\n",
    "The assistant will transform your spoken English to text, then translate it German and speak it out. The image on the UI is just decoration. This exercise was created on MacOS, Python 3.13."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "a07e7793-b8f5-44f4-aded-5562f633271a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Install first PortAudio, in MacOS\n",
    "# brew install portaudio\n",
    "\n",
    "\n",
    "!pip install openai speechrecognition pyaudio\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 1,
   "id": "dcae50aa",
   "metadata": {},
   "outputs": [],
   "source": [
    "# imports\n",
    "\n",
    "import os\n",
    "import json\n",
    "from dotenv import load_dotenv\n",
    "from openai import OpenAI\n",
    "import gradio as gr"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1796b554",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Initialization\n",
    "\n",
    "load_dotenv()\n",
    "\n",
    "openai_api_key = os.getenv('OPENAI_API_KEY')\n",
    "if openai_api_key:\n",
    "    print(f\"OpenAI API Key exists and begins {openai_api_key[:8]}\")\n",
    "else:\n",
    "    print(\"OpenAI API Key not set\")\n",
    "    \n",
    "MODEL = \"gpt-4o-mini\"\n",
    "openai = OpenAI()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "id": "c5caad24",
   "metadata": {},
   "outputs": [],
   "source": [
    "system_message = \"\"\"You are a highly skilled language translator specializing in translating English text to German. \n",
    "Your task is to accurately translate any English text provided by the user into German. \n",
    "Ensure that the translations are grammatically correct and contextually appropriate. \n",
    "If the user provides a phrase, sentence, or paragraph in English, respond with the equivalent translation in German.\"\"\" "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "id": "aca69563",
   "metadata": {},
   "outputs": [],
   "source": [
    "import speech_recognition as sr\n",
    "from pydub import AudioSegment\n",
    "from pydub.playback import play\n",
    "import base64\n",
    "from io import BytesIO\n",
    "from PIL import Image\n",
    "\n",
    "\n",
    "def recognize_speech(audio_file):\n",
    "    recognizer = sr.Recognizer()\n",
    "    with sr.AudioFile(audio_file) as source:\n",
    "        audio = recognizer.record(source)\n",
    "    try:\n",
    "        text = recognizer.recognize_google(audio)\n",
    "        return text\n",
    "    except sr.UnknownValueError:\n",
    "        return \"Google Speech Recognition could not understand audio\"\n",
    "    except sr.RequestError as e:\n",
    "        return f\"Could not request results from Google Speech Recognition service; {e}\"\n",
    "\n",
    "\n",
    "def get_chatgpt_response(message):\n",
    "    response = openai.chat.completions.create(\n",
    "        model=MODEL,\n",
    "        messages = \n",
    "            [{\"role\": \"system\", \"content\": system_message},\n",
    "            {\"role\": \"user\", \"content\": message}],\n",
    "        max_tokens=150\n",
    "    )\n",
    "    return response.choices[0].message.content.strip()\n",
    "\n",
    "def process_audio(audio_file):\n",
    "    text = recognize_speech(audio_file)\n",
    "    if text:\n",
    "        response = get_chatgpt_response(text)\n",
    "        talker(response)\n",
    "        return response\n",
    "    return \"Could not recognize speech.\"\n",
    "\n",
    "def talker(message):\n",
    "    response = openai.audio.speech.create(\n",
    "      model=\"tts-1\",\n",
    "      voice=\"onyx\",    # Also, try replacing onyx with alloy\n",
    "      input=message\n",
    "    )\n",
    "    \n",
    "    audio_stream = BytesIO(response.content)\n",
    "    audio = AudioSegment.from_file(audio_stream, format=\"mp3\")\n",
    "    play(audio)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1118141",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Create Gradio interface\n",
    "\n",
    "# some image decoration to UI, just a static picture\n",
    "image_path =\"week2-exercise-translator-berlin.webp\"\n",
    "\n",
    "with gr.Blocks() as ui:\n",
    "    gr.Interface(\n",
    "        fn=process_audio,\n",
    "        inputs=gr.Audio(type=\"filepath\", label=\"Speak English. German translation in a moment:\"),\n",
    "        outputs=\"text\",\n",
    "        live=True, \n",
    "    )\n",
    "    gr.Image(value=image_path, label=\"Das ist Berlin\")\n",
    "    \n",
    "ui.launch(inbrowser=True)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c1284da5",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv313",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.13.2"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
